import pandas as pd
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import warnings
import keras
from keras.layers import Dense
from keras.models import Sequential
import seaborn as sns
from keras import layers
import statsmodels.api as sm
import statsmodels.formula.api as smf
from sklearn.preprocessing import MinMaxScaler
import plotly.graph_objects as go
from tensorflow.keras import regularizers
import tensorflow as tf
from keras.preprocessing.sequence import TimeseriesGenerator
from io import StringIO
from keras.layers import LSTM, Dense
import chart_studio.tools as tls
from scipy import stats
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import MinMaxScaler
from sklearn.ensemble import RandomForestRegressor
pd.set_option('display.max_columns', None)
#loading Data
df = pd.read_excel (r'Project_static.xlsx')
df.head()
| Date | Gold_ETF | S&P_500 | Silver | Platinum | Palladium | Crude_oil | Euro_USD_Exchange_Rate | EGO | AU | ABX | BVN | CPI | Covid_deaths | Interest_rate_USA | Interest_rate_UK | Disease_volitaly_tracker | Gold_price_trend | Covid_economic_impact | Stock_maket_news | Stock_market_crash | silver_price_trend | Palladium_price_trend | Platinum_price_trend | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 2019-06-21 | 1396.2 | 2950.46 | 15.273 | 811.5 | 1491.3 | 57.43 | 1.1296 | 5.43 | 16.45 | 19.182671 | 16.388355 | 255.361 | 0 | 0.74 | 0.38 | 0.00 | 32 | 0 | 0 | 6 | 8 | 14 | 56 |
| 1 | 2019-06-24 | 1414.3 | 2945.35 | 15.368 | 812.2 | 1520.4 | 57.90 | 1.1385 | 5.49 | 17.11 | 19.975727 | 16.933641 | 255.361 | 0 | 0.71 | 0.36 | 1.78 | 35 | 0 | 49 | 4 | 8 | 19 | 52 |
| 2 | 2019-06-25 | 1414.9 | 2917.38 | 15.291 | 810.3 | 1521.8 | 57.83 | 1.1400 | 5.45 | 16.65 | 19.667839 | 16.636213 | 255.361 | 0 | 0.73 | 0.35 | 0.00 | 35 | 0 | 49 | 4 | 8 | 19 | 52 |
| 3 | 2019-06-26 | 1411.6 | 2913.78 | 15.283 | 817.1 | 1518.0 | 59.38 | 1.1370 | 5.70 | 17.07 | 19.415928 | 16.477583 | 255.361 | 0 | 0.74 | 0.35 | 0.00 | 35 | 0 | 49 | 4 | 8 | 19 | 52 |
| 4 | 2019-06-27 | 1408.4 | 2924.92 | 15.205 | 812.6 | 1532.2 | 59.43 | 1.1375 | 5.75 | 17.21 | 19.210663 | 16.398270 | 255.361 | 0 | 0.71 | 0.36 | 0.55 | 35 | 0 | 49 | 4 | 8 | 19 | 52 |
df.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 738 entries, 0 to 737 Data columns (total 24 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Date 738 non-null datetime64[ns] 1 Gold_ETF 738 non-null float64 2 S&P_500 738 non-null float64 3 Silver 738 non-null float64 4 Platinum 738 non-null float64 5 Palladium 738 non-null float64 6 Crude_oil 738 non-null float64 7 Euro_USD_Exchange_Rate 738 non-null float64 8 EGO 738 non-null float64 9 AU 738 non-null float64 10 ABX 738 non-null float64 11 BVN 738 non-null float64 12 CPI 738 non-null float64 13 Covid_deaths 738 non-null int64 14 Interest_rate_USA 738 non-null float64 15 Interest_rate_UK 738 non-null float64 16 Disease_volitaly_tracker 738 non-null float64 17 Gold_price_trend 738 non-null int64 18 Covid_economic_impact 738 non-null int64 19 Stock_maket_news 738 non-null int64 20 Stock_market_crash 738 non-null int64 21 silver_price_trend 738 non-null int64 22 Palladium_price_trend 738 non-null int64 23 Platinum_price_trend 738 non-null int64 dtypes: datetime64[ns](1), float64(15), int64(8) memory usage: 138.5 KB
df.shape
(738, 24)
Time trend
plt.rcParams["figure.figsize"] = (20,5)
df.plot(x='Date',y='Gold_ETF')
<AxesSubplot:xlabel='Date'>
plt.rcParams["figure.figsize"] = (20,5)
df.plot(x='Date',y='Silver')
<AxesSubplot:xlabel='Date'>
plt.rcParams["figure.figsize"] = (20,5)
df.plot(x='Date',y='Palladium')
<AxesSubplot:xlabel='Date'>
plt.rcParams["figure.figsize"] = (20,5)
df.plot(x='Date',y='Platinum')
<AxesSubplot:xlabel='Date'>
# For Time series reset index to Date
new_df = df.set_index('Date')
new_df.head()
| Gold_ETF | S&P_500 | Silver | Platinum | Palladium | Crude_oil | Euro_USD_Exchange_Rate | EGO | AU | ABX | BVN | CPI | Covid_deaths | Interest_rate_USA | Interest_rate_UK | Disease_volitaly_tracker | Gold_price_trend | Covid_economic_impact | Stock_maket_news | Stock_market_crash | silver_price_trend | Palladium_price_trend | Platinum_price_trend | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Date | |||||||||||||||||||||||
| 2019-06-21 | 1396.2 | 2950.46 | 15.273 | 811.5 | 1491.3 | 57.43 | 1.1296 | 5.43 | 16.45 | 19.182671 | 16.388355 | 255.361 | 0 | 0.74 | 0.38 | 0.00 | 32 | 0 | 0 | 6 | 8 | 14 | 56 |
| 2019-06-24 | 1414.3 | 2945.35 | 15.368 | 812.2 | 1520.4 | 57.90 | 1.1385 | 5.49 | 17.11 | 19.975727 | 16.933641 | 255.361 | 0 | 0.71 | 0.36 | 1.78 | 35 | 0 | 49 | 4 | 8 | 19 | 52 |
| 2019-06-25 | 1414.9 | 2917.38 | 15.291 | 810.3 | 1521.8 | 57.83 | 1.1400 | 5.45 | 16.65 | 19.667839 | 16.636213 | 255.361 | 0 | 0.73 | 0.35 | 0.00 | 35 | 0 | 49 | 4 | 8 | 19 | 52 |
| 2019-06-26 | 1411.6 | 2913.78 | 15.283 | 817.1 | 1518.0 | 59.38 | 1.1370 | 5.70 | 17.07 | 19.415928 | 16.477583 | 255.361 | 0 | 0.74 | 0.35 | 0.00 | 35 | 0 | 49 | 4 | 8 | 19 | 52 |
| 2019-06-27 | 1408.4 | 2924.92 | 15.205 | 812.6 | 1532.2 | 59.43 | 1.1375 | 5.75 | 17.21 | 19.210663 | 16.398270 | 255.361 | 0 | 0.71 | 0.36 | 0.55 | 35 | 0 | 49 | 4 | 8 | 19 | 52 |
plt.rcParams["figure.figsize"] = (20,10)
sns.heatmap(new_df.corr())
<AxesSubplot:>
new_df.corr()
| Gold_ETF | S&P_500 | Silver | Platinum | Palladium | Crude_oil | Euro_USD_Exchange_Rate | EGO | AU | ABX | BVN | CPI | Covid_deaths | Interest_rate_USA | Interest_rate_UK | Disease_volitaly_tracker | Gold_price_trend | Covid_economic_impact | Stock_maket_news | Stock_market_crash | silver_price_trend | Palladium_price_trend | Platinum_price_trend | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Gold_ETF | 1.000000 | 0.594901 | 0.808265 | 0.381407 | 0.614011 | 0.278021 | 0.377113 | 0.705257 | 0.470091 | 0.681161 | -0.495626 | 0.518520 | 0.491924 | -0.615498 | 0.193342 | 0.301835 | 0.290943 | 0.204087 | -0.090103 | 0.001961 | 0.314408 | 0.155644 | 0.448615 |
| S&P_500 | 0.594901 | 1.000000 | 0.720232 | 0.643814 | 0.471741 | 0.704071 | 0.305298 | 0.298287 | -0.178688 | -0.098130 | -0.535643 | 0.785443 | 0.428999 | -0.409345 | 0.234009 | -0.141260 | -0.167483 | -0.341203 | -0.149370 | -0.185627 | 0.070664 | 0.102815 | 0.456565 |
| Silver | 0.808265 | 0.720232 | 1.000000 | 0.766384 | 0.709129 | 0.350812 | 0.686743 | 0.756361 | 0.296655 | 0.331152 | -0.213219 | 0.420916 | 0.409257 | -0.593756 | -0.045910 | 0.006476 | 0.082926 | -0.167328 | -0.140539 | -0.156577 | 0.346463 | 0.064248 | 0.485275 |
| Platinum | 0.381407 | 0.643814 | 0.766384 | 1.000000 | 0.672032 | 0.406597 | 0.585510 | 0.500324 | 0.024563 | -0.111983 | -0.067238 | 0.312500 | 0.279432 | -0.302529 | -0.078677 | -0.184724 | -0.092614 | -0.397850 | -0.089544 | -0.142801 | 0.227960 | 0.162583 | 0.494881 |
| Palladium | 0.614011 | 0.471741 | 0.709129 | 0.672032 | 1.000000 | 0.123400 | 0.544304 | 0.583258 | 0.235065 | 0.336540 | -0.249425 | 0.214436 | 0.218063 | -0.499165 | -0.102531 | 0.206928 | 0.178303 | 0.070769 | 0.116724 | 0.078800 | 0.214654 | 0.348376 | 0.390017 |
| Crude_oil | 0.278021 | 0.704071 | 0.350812 | 0.406597 | 0.123400 | 1.000000 | -0.250994 | -0.061457 | -0.370132 | -0.313191 | -0.261118 | 0.901504 | 0.035913 | 0.265418 | 0.707314 | -0.310902 | -0.154122 | -0.569342 | -0.165159 | -0.146092 | -0.016920 | 0.239543 | 0.447860 |
| Euro_USD_Exchange_Rate | 0.377113 | 0.305298 | 0.686743 | 0.585510 | 0.544304 | -0.250994 | 1.000000 | 0.646324 | 0.267023 | 0.182646 | 0.052024 | -0.231272 | 0.392849 | -0.702342 | -0.633900 | 0.062029 | 0.025829 | 0.023311 | -0.047962 | -0.115300 | 0.260373 | -0.168014 | 0.144593 |
| EGO | 0.705257 | 0.298287 | 0.756361 | 0.500324 | 0.583258 | -0.061457 | 0.646324 | 1.000000 | 0.640633 | 0.609477 | 0.032994 | 0.016611 | 0.406305 | -0.612153 | -0.268603 | 0.162893 | 0.205725 | 0.101405 | -0.107649 | -0.054078 | 0.322836 | 0.023404 | 0.372599 |
| AU | 0.470091 | -0.178688 | 0.296655 | 0.024563 | 0.235065 | -0.370132 | 0.267023 | 0.640633 | 1.000000 | 0.835432 | 0.175537 | -0.292529 | 0.098137 | -0.429588 | -0.250901 | 0.191479 | 0.320691 | 0.317865 | -0.113029 | -0.094936 | 0.256805 | 0.029301 | 0.099349 |
| ABX | 0.681161 | -0.098130 | 0.331152 | -0.111983 | 0.336540 | -0.313191 | 0.182646 | 0.609477 | 0.835432 | 1.000000 | -0.142331 | -0.090895 | 0.203072 | -0.472753 | -0.035274 | 0.416275 | 0.364875 | 0.538219 | -0.053612 | 0.031585 | 0.224481 | 0.036720 | 0.091935 |
| BVN | -0.495626 | -0.535643 | -0.213219 | -0.067238 | -0.249425 | -0.261118 | 0.052024 | 0.032994 | 0.175537 | -0.142331 | 1.000000 | -0.579559 | -0.437015 | 0.367125 | -0.330305 | -0.434805 | -0.136994 | -0.384940 | -0.069618 | -0.238850 | -0.066343 | -0.108430 | -0.248229 |
| CPI | 0.518520 | 0.785443 | 0.420916 | 0.312500 | 0.214436 | 0.901504 | -0.231272 | 0.016611 | -0.292529 | -0.090895 | -0.579559 | 1.000000 | 0.225116 | 0.056582 | 0.751196 | -0.046183 | -0.024087 | -0.275264 | -0.113879 | -0.022940 | 0.032789 | 0.231679 | 0.473510 |
| Covid_deaths | 0.491924 | 0.428999 | 0.409257 | 0.279432 | 0.218063 | 0.035913 | 0.392849 | 0.406305 | 0.098137 | 0.203072 | -0.437015 | 0.225116 | 1.000000 | -0.574236 | -0.116223 | 0.228420 | 0.113809 | 0.243240 | -0.071350 | 0.038397 | 0.253668 | 0.030649 | 0.302233 |
| Interest_rate_USA | -0.615498 | -0.409345 | -0.593756 | -0.302529 | -0.499165 | 0.265418 | -0.702342 | -0.612153 | -0.429588 | -0.472753 | 0.367125 | 0.056582 | -0.574236 | 1.000000 | 0.564132 | -0.266841 | -0.169380 | -0.313442 | -0.003753 | 0.043499 | -0.229509 | 0.002369 | -0.157067 |
| Interest_rate_UK | 0.193342 | 0.234009 | -0.045910 | -0.078677 | -0.102531 | 0.707314 | -0.633900 | -0.268603 | -0.250901 | -0.035274 | -0.330305 | 0.751196 | -0.116223 | 0.564132 | 1.000000 | 0.024327 | 0.026793 | -0.111439 | -0.088812 | 0.073137 | -0.052128 | 0.181968 | 0.244112 |
| Disease_volitaly_tracker | 0.301835 | -0.141260 | 0.006476 | -0.184724 | 0.206928 | -0.310902 | 0.062029 | 0.162893 | 0.191479 | 0.416275 | -0.434805 | -0.046183 | 0.228420 | -0.266841 | 0.024327 | 1.000000 | 0.520181 | 0.734502 | 0.259482 | 0.576298 | 0.223547 | 0.191929 | 0.153072 |
| Gold_price_trend | 0.290943 | -0.167483 | 0.082926 | -0.092614 | 0.178303 | -0.154122 | 0.025829 | 0.205725 | 0.320691 | 0.364875 | -0.136994 | -0.024087 | 0.113809 | -0.169380 | 0.026793 | 0.520181 | 1.000000 | 0.485080 | 0.320490 | 0.574453 | 0.544444 | 0.568225 | 0.453146 |
| Covid_economic_impact | 0.204087 | -0.341203 | -0.167328 | -0.397850 | 0.070769 | -0.569342 | 0.023311 | 0.101405 | 0.317865 | 0.538219 | -0.384940 | -0.275264 | 0.243240 | -0.313442 | -0.111439 | 0.734502 | 0.485080 | 1.000000 | 0.144829 | 0.431179 | 0.143856 | 0.047053 | -0.039523 |
| Stock_maket_news | -0.090103 | -0.149370 | -0.140539 | -0.089544 | 0.116724 | -0.165159 | -0.047962 | -0.107649 | -0.113029 | -0.053612 | -0.069618 | -0.113879 | -0.071350 | -0.003753 | -0.088812 | 0.259482 | 0.320490 | 0.144829 | 1.000000 | 0.491632 | 0.206859 | 0.371772 | 0.064993 |
| Stock_market_crash | 0.001961 | -0.185627 | -0.156577 | -0.142801 | 0.078800 | -0.146092 | -0.115300 | -0.054078 | -0.094936 | 0.031585 | -0.238850 | -0.022940 | 0.038397 | 0.043499 | 0.073137 | 0.576298 | 0.574453 | 0.431179 | 0.491632 | 1.000000 | 0.215842 | 0.431137 | 0.165397 |
| silver_price_trend | 0.314408 | 0.070664 | 0.346463 | 0.227960 | 0.214654 | -0.016920 | 0.260373 | 0.322836 | 0.256805 | 0.224481 | -0.066343 | 0.032789 | 0.253668 | -0.229509 | -0.052128 | 0.223547 | 0.544444 | 0.143856 | 0.206859 | 0.215842 | 1.000000 | 0.261089 | 0.451579 |
| Palladium_price_trend | 0.155644 | 0.102815 | 0.064248 | 0.162583 | 0.348376 | 0.239543 | -0.168014 | 0.023404 | 0.029301 | 0.036720 | -0.108430 | 0.231679 | 0.030649 | 0.002369 | 0.181968 | 0.191929 | 0.568225 | 0.047053 | 0.371772 | 0.431137 | 0.261089 | 1.000000 | 0.509484 |
| Platinum_price_trend | 0.448615 | 0.456565 | 0.485275 | 0.494881 | 0.390017 | 0.447860 | 0.144593 | 0.372599 | 0.099349 | 0.091935 | -0.248229 | 0.473510 | 0.302233 | -0.157067 | 0.244112 | 0.153072 | 0.453146 | -0.039523 | 0.064993 | 0.165397 | 0.451579 | 0.509484 | 1.000000 |
def returns(col):
return col / col.shift(1) - 1
new_df['Gold_ETF_Return'] = returns(new_df['Gold_ETF'])
new_df['Silver_Return'] = returns(new_df['Silver'])
new_df['Platinum_Return'] = returns(new_df['Platinum'])
new_df['Palladium_Return'] = returns(new_df['Palladium'])
new_df.head()
| Gold_ETF | S&P_500 | Silver | Platinum | Palladium | Crude_oil | Euro_USD_Exchange_Rate | EGO | AU | ABX | BVN | CPI | Covid_deaths | Interest_rate_USA | Interest_rate_UK | Disease_volitaly_tracker | Gold_price_trend | Covid_economic_impact | Stock_maket_news | Stock_market_crash | silver_price_trend | Palladium_price_trend | Platinum_price_trend | Gold_ETF_Return | Silver_Return | Platinum_Return | Palladium_Return | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Date | |||||||||||||||||||||||||||
| 2019-06-21 | 1396.2 | 2950.46 | 15.273 | 811.5 | 1491.3 | 57.43 | 1.1296 | 5.43 | 16.45 | 19.182671 | 16.388355 | 255.361 | 0 | 0.74 | 0.38 | 0.00 | 32 | 0 | 0 | 6 | 8 | 14 | 56 | NaN | NaN | NaN | NaN |
| 2019-06-24 | 1414.3 | 2945.35 | 15.368 | 812.2 | 1520.4 | 57.90 | 1.1385 | 5.49 | 17.11 | 19.975727 | 16.933641 | 255.361 | 0 | 0.71 | 0.36 | 1.78 | 35 | 0 | 49 | 4 | 8 | 19 | 52 | 0.012964 | 0.006220 | 0.000863 | 0.019513 |
| 2019-06-25 | 1414.9 | 2917.38 | 15.291 | 810.3 | 1521.8 | 57.83 | 1.1400 | 5.45 | 16.65 | 19.667839 | 16.636213 | 255.361 | 0 | 0.73 | 0.35 | 0.00 | 35 | 0 | 49 | 4 | 8 | 19 | 52 | 0.000424 | -0.005010 | -0.002339 | 0.000921 |
| 2019-06-26 | 1411.6 | 2913.78 | 15.283 | 817.1 | 1518.0 | 59.38 | 1.1370 | 5.70 | 17.07 | 19.415928 | 16.477583 | 255.361 | 0 | 0.74 | 0.35 | 0.00 | 35 | 0 | 49 | 4 | 8 | 19 | 52 | -0.002332 | -0.000523 | 0.008392 | -0.002497 |
| 2019-06-27 | 1408.4 | 2924.92 | 15.205 | 812.6 | 1532.2 | 59.43 | 1.1375 | 5.75 | 17.21 | 19.210663 | 16.398270 | 255.361 | 0 | 0.71 | 0.36 | 0.55 | 35 | 0 | 49 | 4 | 8 | 19 | 52 | -0.002267 | -0.005104 | -0.005507 | 0.009354 |
new_df.isnull().sum()
Gold_ETF 0 S&P_500 0 Silver 0 Platinum 0 Palladium 0 Crude_oil 0 Euro_USD_Exchange_Rate 0 EGO 0 AU 0 ABX 0 BVN 0 CPI 0 Covid_deaths 0 Interest_rate_USA 0 Interest_rate_UK 0 Disease_volitaly_tracker 0 Gold_price_trend 0 Covid_economic_impact 0 Stock_maket_news 0 Stock_market_crash 0 silver_price_trend 0 Palladium_price_trend 0 Platinum_price_trend 0 Gold_ETF_Return 1 Silver_Return 1 Platinum_Return 1 Palladium_Return 1 dtype: int64
new_df.dropna(inplace=True)
new_df.isnull().sum()
Gold_ETF 0 S&P_500 0 Silver 0 Platinum 0 Palladium 0 Crude_oil 0 Euro_USD_Exchange_Rate 0 EGO 0 AU 0 ABX 0 BVN 0 CPI 0 Covid_deaths 0 Interest_rate_USA 0 Interest_rate_UK 0 Disease_volitaly_tracker 0 Gold_price_trend 0 Covid_economic_impact 0 Stock_maket_news 0 Stock_market_crash 0 silver_price_trend 0 Palladium_price_trend 0 Platinum_price_trend 0 Gold_ETF_Return 0 Silver_Return 0 Platinum_Return 0 Palladium_Return 0 dtype: int64
# Accuracy metrics
def forecast_accuracy(forecast, actual):
me = np.mean(forecast - actual) # ME
mae = np.mean(np.abs(forecast - actual)) # MAE
rmse = np.mean((forecast - actual)**2)**.5 # RMSE
mse = np.mean((forecast - actual)**2) # MSE
return({ 'me':me, 'mae': mae,
'rmse':rmse,
'mse':mse})
new_df_gold = new_df.copy()
new_df_silver = new_df.copy()
new_df_platinum = new_df.copy()
new_df_palladium = new_df.copy()
test_size = int(0.3 * len(new_df_gold))
train_size = len(new_df_gold) - test_size
training_x = new_df_gold[:train_size].drop('Gold_ETF',axis=1)
training_y = new_df_gold[:train_size]['Gold_ETF']
testing_x = new_df_gold[train_size:].drop(['Gold_ETF'], axis=1)
testing_y = new_df_gold[train_size:]['Gold_ETF']
X2 = sm.add_constant(training_x)
est = sm.OLS(training_y, X2)
est2 = est.fit()
print(est2.summary())
C:\ProgramData\Anaconda3\lib\site-packages\statsmodels\tsa\tsatools.py:142: FutureWarning: In a future version of pandas all arguments of concat except for the argument 'objs' will be keyword-only x = pd.concat(x[::order], 1)
OLS Regression Results
==============================================================================
Dep. Variable: Gold_ETF R-squared: 0.989
Model: OLS Adj. R-squared: 0.989
Method: Least Squares F-statistic: 1723.
Date: Mon, 25 Jul 2022 Prob (F-statistic): 0.00
Time: 21:14:43 Log-Likelihood: -2207.7
No. Observations: 516 AIC: 4469.
Df Residuals: 489 BIC: 4584.
Df Model: 26
Covariance Type: nonrobust
============================================================================================
coef std err t P>|t| [0.025 0.975]
--------------------------------------------------------------------------------------------
const 876.0314 264.485 3.312 0.001 356.364 1395.698
S&P_500 -0.0277 0.011 -2.443 0.015 -0.050 -0.005
Silver 16.8527 1.093 15.421 0.000 14.705 19.000
Platinum -0.1558 0.026 -5.903 0.000 -0.208 -0.104
Palladium 0.0635 0.008 7.781 0.000 0.047 0.079
Crude_oil -0.3590 0.223 -1.611 0.108 -0.797 0.079
Euro_USD_Exchange_Rate 470.8142 70.163 6.710 0.000 332.955 608.673
EGO 1.7104 1.113 1.537 0.125 -0.476 3.896
AU 3.0267 0.714 4.238 0.000 1.624 4.430
ABX 7.6814 0.825 9.311 0.000 6.060 9.302
BVN -6.2831 1.006 -6.247 0.000 -8.259 -4.307
CPI -0.6145 1.005 -0.611 0.541 -2.589 1.360
Covid_deaths 0.0027 0.002 1.474 0.141 -0.001 0.006
Interest_rate_USA -94.0916 11.722 -8.027 0.000 -117.123 -71.060
Interest_rate_UK -6.2305 10.441 -0.597 0.551 -26.744 14.283
Disease_volitaly_tracker -0.3056 0.134 -2.282 0.023 -0.569 -0.042
Gold_price_trend 0.6120 0.117 5.242 0.000 0.383 0.841
Covid_economic_impact -0.1061 0.124 -0.853 0.394 -0.351 0.138
Stock_maket_news -0.0494 0.057 -0.865 0.387 -0.162 0.063
Stock_market_crash -0.1615 0.138 -1.170 0.242 -0.433 0.110
silver_price_trend -0.3246 0.118 -2.750 0.006 -0.557 -0.093
Palladium_price_trend 0.1585 0.200 0.792 0.428 -0.234 0.551
Platinum_price_trend -0.3373 0.168 -2.006 0.045 -0.668 -0.007
Gold_ETF_Return 458.4730 110.191 4.161 0.000 241.968 674.978
Silver_Return -79.2051 57.883 -1.368 0.172 -192.935 34.525
Platinum_Return 6.3703 52.610 0.121 0.904 -96.999 109.740
Palladium_Return -8.4088 35.944 -0.234 0.815 -79.033 62.215
==============================================================================
Omnibus: 2.659 Durbin-Watson: 0.321
Prob(Omnibus): 0.265 Jarque-Bera (JB): 2.487
Skew: 0.104 Prob(JB): 0.288
Kurtosis: 2.732 Cond. No. 1.45e+06
==============================================================================
Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The condition number is large, 1.45e+06. This might indicate that there are
strong multicollinearity or other numerical problems.
# create regressor object
regressor = RandomForestRegressor(n_estimators = 500, random_state = 0)
# fit the regressor with x and y data
regressor.fit(training_x, training_y)
pred = regressor.predict(testing_x)
importances = regressor.feature_importances_
importances
#Create a DataFrame using a Dictionary
data={'feature_names':training_x.columns,'feature_importance':importances}
fi_df = pd.DataFrame(data)
fi_df
#Sort the DataFrame in order decreasing feature importance
fi_df.sort_values(by=['feature_importance'], ascending=False,inplace=True)
#Define size of bar plot
plt.figure(figsize=(10,8))
#Plot Searborn bar chart
sns.barplot(x=fi_df['feature_importance'], y=fi_df['feature_names'])
#Add chart labels
plt.title( 'FEATURE IMPORTANCE')
plt.xlabel('FEATURE IMPORTANCE')
plt.ylabel('FEATURE NAMES')
Text(0, 0.5, 'FEATURE NAMES')
col_to_remove = ['Silver_Return', 'Platinum_Return', 'Palladium_Return','Covid_economic_impact','Interest_rate_UK']
new_df_gold.drop(col_to_remove,axis=1,inplace=True)
scaler = MinMaxScaler()
feature_minmax_transform_data = scaler.fit_transform(new_df_gold)
feature_minmax_transform = pd.DataFrame(columns=new_df_gold.columns, data=feature_minmax_transform_data, index=new_df_gold.index)
feature_minmax_transform.head()
| Gold_ETF | S&P_500 | Silver | Platinum | Palladium | Crude_oil | Euro_USD_Exchange_Rate | EGO | AU | ABX | BVN | CPI | Covid_deaths | Interest_rate_USA | Disease_volitaly_tracker | Gold_price_trend | Stock_maket_news | Stock_market_crash | silver_price_trend | Palladium_price_trend | Platinum_price_trend | Gold_ETF_Return | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Date | ||||||||||||||||||||||
| 2019-06-24 | 0.026878 | 0.276961 | 0.205684 | 0.311357 | 0.076525 | 0.592140 | 0.583079 | 0.036957 | 0.150886 | 0.046521 | 0.943730 | 0.0 | 0.059689 | 0.733696 | 0.026035 | 0.155844 | 0.49 | 0.0 | 0.010753 | 0.10989 | 0.213115 | 0.574306 |
| 2019-06-25 | 0.027795 | 0.266018 | 0.201325 | 0.308622 | 0.077408 | 0.591706 | 0.589621 | 0.032609 | 0.130999 | 0.030190 | 0.918152 | 0.0 | 0.059689 | 0.744565 | 0.000000 | 0.155844 | 0.49 | 0.0 | 0.010753 | 0.10989 | 0.213115 | 0.459543 |
| 2019-06-26 | 0.022755 | 0.264610 | 0.200872 | 0.318411 | 0.075013 | 0.601314 | 0.576537 | 0.059783 | 0.149157 | 0.016827 | 0.904511 | 0.0 | 0.059689 | 0.750000 | 0.000000 | 0.155844 | 0.49 | 0.0 | 0.010753 | 0.10989 | 0.213115 | 0.434314 |
| 2019-06-27 | 0.017868 | 0.268968 | 0.196456 | 0.311933 | 0.083964 | 0.601624 | 0.578718 | 0.065217 | 0.155210 | 0.005939 | 0.897690 | 0.0 | 0.059689 | 0.733696 | 0.008044 | 0.155844 | 0.49 | 0.0 | 0.010753 | 0.10989 | 0.213115 | 0.434913 |
| 2019-06-28 | 0.019853 | 0.275556 | 0.199173 | 0.344897 | 0.084090 | 0.595673 | 0.577846 | 0.072826 | 0.153048 | 0.009898 | 0.908774 | 0.0 | 0.059689 | 0.739130 | 0.000000 | 0.155844 | 0.49 | 0.0 | 0.010753 | 0.10989 | 0.213115 | 0.464108 |
transformed_df = feature_minmax_transform
test_size = int(0.3 * len(transformed_df))
train_size = len(transformed_df) - test_size
training_x = transformed_df[:train_size].drop('Gold_ETF',axis=1)
training_y = transformed_df[:train_size]['Gold_ETF']
testing_x = transformed_df[train_size:].drop(['Gold_ETF'], axis=1)
testing_y = transformed_df[train_size:]['Gold_ETF']
X2 = sm.add_constant(training_x)
est = sm.OLS(training_y, X2)
est2 = est.fit()
print(est2.summary())
OLS Regression Results
==============================================================================
Dep. Variable: Gold_ETF R-squared: 0.989
Model: OLS Adj. R-squared: 0.989
Method: Least Squares F-statistic: 2137.
Date: Mon, 25 Jul 2022 Prob (F-statistic): 0.00
Time: 21:14:46 Log-Likelihood: 1136.0
No. Observations: 516 AIC: -2228.
Df Residuals: 494 BIC: -2135.
Df Model: 21
Covariance Type: nonrobust
============================================================================================
coef std err t P>|t| [0.025 0.975]
--------------------------------------------------------------------------------------------
const 0.2180 0.032 6.885 0.000 0.156 0.280
S&P_500 -0.0957 0.042 -2.277 0.023 -0.178 -0.013
Silver 0.4487 0.028 15.772 0.000 0.393 0.505
Platinum -0.1599 0.027 -5.986 0.000 -0.212 -0.107
Palladium 0.1468 0.019 7.843 0.000 0.110 0.184
Crude_oil -0.0783 0.053 -1.479 0.140 -0.182 0.026
Euro_USD_Exchange_Rate 0.1650 0.023 7.064 0.000 0.119 0.211
EGO 0.0286 0.014 2.056 0.040 0.001 0.056
AU 0.1056 0.025 4.294 0.000 0.057 0.154
ABX 0.2126 0.022 9.618 0.000 0.169 0.256
BVN -0.1030 0.016 -6.338 0.000 -0.135 -0.071
CPI -0.0532 0.058 -0.912 0.362 -0.168 0.061
Covid_deaths 0.0134 0.012 1.098 0.273 -0.011 0.037
Interest_rate_USA -0.2887 0.025 -11.509 0.000 -0.338 -0.239
Disease_volitaly_tracker -0.0391 0.013 -2.933 0.004 -0.065 -0.013
Gold_price_trend 0.0705 0.014 5.186 0.000 0.044 0.097
Stock_maket_news -0.0037 0.008 -0.439 0.661 -0.020 0.013
Stock_market_crash -0.0190 0.020 -0.973 0.331 -0.057 0.019
silver_price_trend -0.0486 0.017 -2.931 0.004 -0.081 -0.016
Palladium_price_trend 0.0234 0.027 0.852 0.395 -0.031 0.077
Platinum_price_trend -0.0348 0.015 -2.318 0.021 -0.064 -0.005
Gold_ETF_Return 0.0540 0.012 4.525 0.000 0.031 0.078
==============================================================================
Omnibus: 3.755 Durbin-Watson: 0.342
Prob(Omnibus): 0.153 Jarque-Bera (JB): 3.243
Skew: 0.109 Prob(JB): 0.198
Kurtosis: 2.679 Cond. No. 121.
==============================================================================
Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
C:\ProgramData\Anaconda3\lib\site-packages\statsmodels\tsa\tsatools.py:142: FutureWarning: In a future version of pandas all arguments of concat except for the argument 'objs' will be keyword-only x = pd.concat(x[::order], 1)
# create regressor object
regressor = RandomForestRegressor(n_estimators = 500, random_state = 0)
# fit the regressor with x and y data
regressor.fit(training_x, training_y)
pred = regressor.predict(testing_x)
importances = regressor.feature_importances_
importances
#Create a DataFrame using a Dictionary
data={'feature_names':training_x.columns,'feature_importance':importances}
fi_df = pd.DataFrame(data)
fi_df
#Sort the DataFrame in order decreasing feature importance
fi_df.sort_values(by=['feature_importance'], ascending=False,inplace=True)
#Define size of bar plot
plt.figure(figsize=(10,8))
#Plot Searborn bar chart
sns.barplot(x=fi_df['feature_importance'], y=fi_df['feature_names'][:10])
#Add chart labels
plt.title( 'FEATURE IMPORTANCE')
plt.xlabel('FEATURE IMPORTANCE')
plt.ylabel('FEATURE NAMES')
Text(0, 0.5, 'FEATURE NAMES')
date_df = pd.DataFrame(transformed_df["Gold_ETF"])
date_df['Date'] = date_df.index
date_df.drop(columns="Gold_ETF",inplace=True)
date_df= date_df.squeeze()
float_data = np.array(transformed_df.values.tolist())
# Train-Test Split
nn_metal_data = float_data
split_percent = 0.70
split = int(split_percent*len(nn_metal_data))
nn_metal_train = nn_metal_data
nn_metal_test = nn_metal_data
date_train = date_df
date_test = date_df
print(len(nn_metal_train))
print(len(nn_metal_test))
737 737
look_back =3 #lags
numberOfCols = len(transformed_df.columns)
#train and test generator
train_generator = TimeseriesGenerator(nn_metal_train, nn_metal_train, length=look_back, batch_size=1)
test_generator = TimeseriesGenerator(nn_metal_test, nn_metal_test, length=look_back, batch_size=1)
model = Sequential()
model.add(
LSTM(32,
activation='relu',
input_shape=(look_back,numberOfCols),
return_sequences=True)
)
model.add(
layers.GRU(64,
activation='relu',
activity_regularizer=regularizers.l2(0.01),
input_shape=(look_back,numberOfCols),
return_sequences=True)
)
model.add(
layers.GRU(32,
activation='relu',
input_shape=(look_back,numberOfCols),
return_sequences=True)
)
model.add(
layers.LSTM(80,
activation='relu',
input_shape=(look_back,numberOfCols))
)
model.add(Dense(numberOfCols))
model.compile(optimizer='adam', loss='mse')
num_epochs = 25
model.fit_generator(train_generator, epochs=num_epochs, verbose=1)
C:\Users\LOCAL_~1\Temp/ipykernel_10088/1287865284.py:31: UserWarning: `Model.fit_generator` is deprecated and will be removed in a future version. Please use `Model.fit`, which supports generators. model.fit_generator(train_generator, epochs=num_epochs, verbose=1)
Epoch 1/25 734/734 [==============================] - 8s 5ms/step - loss: 0.0381 Epoch 2/25 734/734 [==============================] - 4s 5ms/step - loss: 0.0174 Epoch 3/25 734/734 [==============================] - 4s 6ms/step - loss: 0.0122 Epoch 4/25 734/734 [==============================] - 5s 6ms/step - loss: 0.0107 Epoch 5/25 734/734 [==============================] - 4s 6ms/step - loss: 0.0095 Epoch 6/25 734/734 [==============================] - 4s 6ms/step - loss: 0.0091 Epoch 7/25 734/734 [==============================] - 4s 6ms/step - loss: 0.0083A: 0s - los Epoch 8/25 734/734 [==============================] - 4s 6ms/step - loss: 0.0079 Epoch 9/25 734/734 [==============================] - 4s 5ms/step - loss: 0.0076 Epoch 10/25 734/734 [==============================] - 4s 5ms/step - loss: 0.0070 Epoch 11/25 734/734 [==============================] - 4s 5ms/step - loss: 0.0068 Epoch 12/25 734/734 [==============================] - 4s 5ms/step - loss: 0.0067 Epoch 13/25 734/734 [==============================] - 4s 5ms/step - loss: 0.0064 Epoch 14/25 734/734 [==============================] - 4s 6ms/step - loss: 0.0062 Epoch 15/25 734/734 [==============================] - 4s 5ms/step - loss: 0.0060 Epoch 16/25 734/734 [==============================] - 4s 5ms/step - loss: 0.0059 Epoch 17/25 734/734 [==============================] - 4s 5ms/step - loss: 0.0058 Epoch 18/25 734/734 [==============================] - 4s 5ms/step - loss: 0.0057 Epoch 19/25 734/734 [==============================] - 4s 5ms/step - loss: 0.0057 Epoch 20/25 734/734 [==============================] - 4s 5ms/step - loss: 0.0054 Epoch 21/25 734/734 [==============================] - 4s 5ms/step - loss: 0.0056 Epoch 22/25 734/734 [==============================] - 4s 5ms/step - loss: 0.0052 Epoch 23/25 734/734 [==============================] - 4s 5ms/step - loss: 0.0053 Epoch 24/25 734/734 [==============================] - 4s 5ms/step - loss: 0.0053 Epoch 25/25 734/734 [==============================] - 4s 5ms/step - loss: 0.0052
<keras.callbacks.History at 0x21652908e80>
nn_metal_train = scaler.inverse_transform(nn_metal_train)
nn_metal_test = scaler.inverse_transform(nn_metal_test)
prediction = model.predict(test_generator)
nn_metal_train = nn_metal_train.reshape((-1))
nn_metal_train = nn_metal_train[::numberOfCols]
nn_metal_test = nn_metal_test.reshape((-1))
nn_metal_test = nn_metal_test[::numberOfCols]
prediction = scaler.inverse_transform(prediction)
prediction = prediction.reshape((-1))
prediction = prediction[::numberOfCols]
trace1 = go.Scatter(
x = date_train,
y = nn_metal_train,
mode = 'lines',
name = 'Data'
)
trace2 = go.Scatter(
x = date_test,
y = prediction,
mode = 'lines',
name = 'Predicted Value'
)
trace3 = go.Scatter(
x = date_test,
y = nn_metal_test,
mode='lines',
name = 'Actual values'
)
layout = go.Layout(
title = "Price Predictions",
xaxis = {'title' : "Date"},
yaxis = {'title' : "Value"}
)
fig = go.Figure(data=[trace1, trace2, trace3], layout=layout)
fig.show()
forecast_accuracy(prediction, nn_metal_test[:734])
{'me': 5.428889020457253,
'mae': 26.64802006609759,
'rmse': 34.31694910105655,
'mse': 1177.652995604506}
nn_metal_data = nn_metal_data.reshape((-1,numberOfCols))
def predict(num_prediction, model):
prediction_list = nn_metal_data[-look_back:]
for _ in range(num_prediction):
prediction_list = prediction_list.reshape((-1,numberOfCols))
#print(prediction_list)
#prediction_list = scaler.inverse_transform(prediction_list)
x = prediction_list[-look_back:]
x = x.reshape((1,look_back, numberOfCols))
out = model.predict(x)[0][0]
for i in range(0,numberOfCols):
prediction_list = np.append(prediction_list, model.predict(x)[0][i])
prediction_list = scaler.inverse_transform(prediction_list.reshape((-1,numberOfCols)))
prediction_list = prediction_list.reshape((-1))
#prediction_list = prediction_list[look_back-1:]
prediction_list = prediction_list[::numberOfCols]
return prediction_list
def predict_dates(num_prediction):
last_date = df['Date'].values[-2]
prediction_dates = pd.date_range(last_date, periods=num_prediction+1, freq='D').tolist()
return prediction_dates
num_prediction = 10
forecast = predict(num_prediction, model)
forecast_dates = predict_dates(num_prediction)
forecast = forecast[1:]
nn_metal_data = scaler.inverse_transform(nn_metal_data)
nn_metal_data = nn_metal_data.reshape((-1))
nn_metal_data = nn_metal_data[::numberOfCols]
forecast = forecast.reshape((-1))
#forecast_list1 = forecast[:5]
trace1 = go.Scatter(
x = df['Date'],
y = nn_metal_data,
mode = 'lines',
name = 'Data'
)
trace2 = go.Scatter(
x = forecast_dates,
y = forecast,
mode = 'lines',
name = 'forecast'
)
layout = go.Layout(
title = "Forecasting Price",
xaxis = {'title' : "Date"},
yaxis = {'title' : "Value"}
)
fig = go.Figure(data=[trace1, trace2], layout=layout)
fig.show()
test_size = int(0.3 * len(new_df_silver))
train_size = len(new_df_silver) - test_size
training_x = new_df_silver[:train_size].drop('Silver',axis=1)
training_y = new_df_silver[:train_size]['Silver']
testing_x = new_df_silver[train_size:].drop(['Silver'], axis=1)
testing_y = new_df_silver[train_size:]['Silver']
X2 = sm.add_constant(training_x)
est = sm.OLS(training_y, X2)
est2 = est.fit()
print(est2.summary())
OLS Regression Results
==============================================================================
Dep. Variable: Silver R-squared: 0.984
Model: OLS Adj. R-squared: 0.983
Method: Least Squares F-statistic: 1124.
Date: Mon, 25 Jul 2022 Prob (F-statistic): 0.00
Time: 21:16:49 Log-Likelihood: -462.05
No. Observations: 516 AIC: 978.1
Df Residuals: 489 BIC: 1093.
Df Model: 26
Covariance Type: nonrobust
============================================================================================
coef std err t P>|t| [0.025 0.975]
--------------------------------------------------------------------------------------------
const -91.0605 8.089 -11.257 0.000 -106.955 -75.167
Gold_ETF 0.0194 0.001 15.421 0.000 0.017 0.022
S&P_500 3.385e-05 0.000 0.087 0.930 -0.001 0.001
Platinum 0.0137 0.001 19.966 0.000 0.012 0.015
Palladium -0.0019 0.000 -6.745 0.000 -0.002 -0.001
Crude_oil 0.0073 0.008 0.965 0.335 -0.008 0.022
Euro_USD_Exchange_Rate 14.3797 2.402 5.986 0.000 9.660 19.100
EGO -0.0085 0.038 -0.223 0.823 -0.083 0.066
AU -0.1492 0.024 -6.287 0.000 -0.196 -0.103
ABX 0.0560 0.030 1.851 0.065 -0.003 0.116
BVN 0.3979 0.031 13.016 0.000 0.338 0.458
CPI 0.1961 0.033 5.950 0.000 0.131 0.261
Covid_deaths -0.0003 5.96e-05 -5.807 0.000 -0.000 -0.000
Interest_rate_USA 0.0493 0.423 0.117 0.907 -0.782 0.881
Interest_rate_UK -0.9210 0.352 -2.616 0.009 -1.613 -0.229
Disease_volitaly_tracker 0.0035 0.005 0.768 0.443 -0.005 0.012
Gold_price_trend -0.0031 0.004 -0.770 0.441 -0.011 0.005
Covid_economic_impact 0.0137 0.004 3.279 0.001 0.005 0.022
Stock_maket_news 0.0026 0.002 1.320 0.187 -0.001 0.006
Stock_market_crash -0.0008 0.005 -0.170 0.865 -0.010 0.008
silver_price_trend 0.0256 0.004 6.616 0.000 0.018 0.033
Palladium_price_trend -0.0100 0.007 -1.472 0.142 -0.023 0.003
Platinum_price_trend -0.0151 0.006 -2.658 0.008 -0.026 -0.004
Gold_ETF_Return -13.7449 3.755 -3.661 0.000 -21.122 -6.368
Silver_Return 6.8719 1.944 3.535 0.000 3.053 10.691
Platinum_Return -4.5686 1.774 -2.576 0.010 -8.054 -1.084
Palladium_Return 2.0314 1.217 1.670 0.096 -0.359 4.422
==============================================================================
Omnibus: 1.816 Durbin-Watson: 0.289
Prob(Omnibus): 0.403 Jarque-Bera (JB): 1.707
Skew: 0.048 Prob(JB): 0.426
Kurtosis: 3.265 Cond. No. 1.40e+06
==============================================================================
Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The condition number is large, 1.4e+06. This might indicate that there are
strong multicollinearity or other numerical problems.
C:\ProgramData\Anaconda3\lib\site-packages\statsmodels\tsa\tsatools.py:142: FutureWarning: In a future version of pandas all arguments of concat except for the argument 'objs' will be keyword-only
# create regressor object
regressor = RandomForestRegressor(n_estimators = 500, random_state = 0)
# fit the regressor with x and y data
regressor.fit(training_x, training_y)
pred = regressor.predict(testing_x)
importances = regressor.feature_importances_
importances
#Create a DataFrame using a Dictionary
data={'feature_names':training_x.columns,'feature_importance':importances}
fi_df = pd.DataFrame(data)
fi_df
#Sort the DataFrame in order decreasing feature importance
fi_df.sort_values(by=['feature_importance'], ascending=False,inplace=True)
#Define size of bar plot
plt.figure(figsize=(10,8))
#Plot Searborn bar chart
sns.barplot(x=fi_df['feature_importance'], y=fi_df['feature_names'][:10])
#Add chart labels
plt.title( 'FEATURE IMPORTANCE')
plt.xlabel('FEATURE IMPORTANCE')
plt.ylabel('FEATURE NAMES')
Text(0, 0.5, 'FEATURE NAMES')
col_to_remove = ['EGO','Interest_rate_USA','Palladium_price_trend','Gold_price_trend']
new_df_silver.drop(col_to_remove,axis=1,inplace=True)
scaler = MinMaxScaler()
feature_minmax_transform_data = scaler.fit_transform(new_df_silver)
feature_minmax_transform = pd.DataFrame(columns=new_df_silver.columns, data=feature_minmax_transform_data, index=new_df_silver.index)
feature_minmax_transform.head()
transformed_df = feature_minmax_transform
test_size = int(0.3 * len(transformed_df))
train_size = len(transformed_df) - test_size
training_x = transformed_df[:train_size].drop('Silver',axis=1)
training_y = transformed_df[:train_size]['Silver']
testing_x = transformed_df[train_size:].drop(['Silver'], axis=1)
testing_y = transformed_df[train_size:]['Silver']
X2 = sm.add_constant(training_x)
est = sm.OLS(training_y, X2)
est2 = est.fit()
print(est2.summary())
OLS Regression Results
==============================================================================
Dep. Variable: Silver R-squared: 0.983
Model: OLS Adj. R-squared: 0.983
Method: Least Squares F-statistic: 1329.
Date: Mon, 25 Jul 2022 Prob (F-statistic): 0.00
Time: 21:16:52 Log-Likelihood: 1017.8
No. Observations: 516 AIC: -1990.
Df Residuals: 493 BIC: -1892.
Df Model: 22
Covariance Type: nonrobust
============================================================================================
coef std err t P>|t| [0.025 0.975]
--------------------------------------------------------------------------------------------
const -0.2636 0.037 -7.068 0.000 -0.337 -0.190
Gold_ETF 0.7018 0.041 17.275 0.000 0.622 0.782
S&P_500 0.0171 0.055 0.312 0.755 -0.090 0.124
Platinum 0.5492 0.025 22.133 0.000 0.500 0.598
Palladium -0.1934 0.021 -9.421 0.000 -0.234 -0.153
Crude_oil 0.0509 0.068 0.753 0.452 -0.082 0.184
Euro_USD_Exchange_Rate 0.1972 0.030 6.537 0.000 0.138 0.257
AU -0.2015 0.030 -6.778 0.000 -0.260 -0.143
ABX 0.0701 0.030 2.302 0.022 0.010 0.130
BVN 0.2588 0.018 14.537 0.000 0.224 0.294
CPI 0.4657 0.069 6.731 0.000 0.330 0.602
Covid_deaths -0.0906 0.014 -6.375 0.000 -0.118 -0.063
Interest_rate_UK -0.1563 0.041 -3.808 0.000 -0.237 -0.076
Disease_volitaly_tracker 0.0142 0.017 0.819 0.413 -0.020 0.048
Covid_economic_impact 0.0783 0.023 3.372 0.001 0.033 0.124
Stock_maket_news 0.0158 0.011 1.445 0.149 -0.006 0.037
Stock_market_crash -0.0291 0.021 -1.387 0.166 -0.070 0.012
silver_price_trend 0.1218 0.019 6.483 0.000 0.085 0.159
Platinum_price_trend -0.0685 0.017 -3.977 0.000 -0.102 -0.035
Gold_ETF_Return -0.0873 0.023 -3.775 0.000 -0.133 -0.042
Silver_Return 0.1100 0.030 3.646 0.000 0.051 0.169
Platinum_Return -0.0616 0.023 -2.636 0.009 -0.107 -0.016
Palladium_Return 0.0581 0.032 1.836 0.067 -0.004 0.120
==============================================================================
Omnibus: 2.146 Durbin-Watson: 0.288
Prob(Omnibus): 0.342 Jarque-Bera (JB): 2.003
Skew: 0.087 Prob(JB): 0.367
Kurtosis: 3.251 Cond. No. 120.
==============================================================================
Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
C:\ProgramData\Anaconda3\lib\site-packages\statsmodels\tsa\tsatools.py:142: FutureWarning: In a future version of pandas all arguments of concat except for the argument 'objs' will be keyword-only
len(new_df.columns)
27
date_df = pd.DataFrame(transformed_df["Silver"])
date_df['Date'] = date_df.index
date_df.drop(columns="Silver",inplace=True)
date_df= date_df.squeeze()
float_data = np.array(transformed_df.values.tolist())
# Train-Test Split
nn_metal_data = float_data
split_percent = 0.70
split = int(split_percent*len(nn_metal_data))
nn_metal_train = nn_metal_data
nn_metal_test = nn_metal_data
date_train = date_df
date_test = date_df
print(len(nn_metal_train))
print(len(nn_metal_test))
737 737
look_back =3 #lags
numberOfCols = len(transformed_df.columns)
#train and test generator
train_generator = TimeseriesGenerator(nn_metal_train, nn_metal_train, length=look_back, batch_size=1)
test_generator = TimeseriesGenerator(nn_metal_test, nn_metal_test, length=look_back, batch_size=1)
model = Sequential()
model.add(
LSTM(32,
activation='relu',
input_shape=(look_back,numberOfCols),
return_sequences=True)
)
model.add(
layers.GRU(64,
activation='relu',
activity_regularizer=regularizers.l2(0.01),
input_shape=(look_back,numberOfCols),
return_sequences=True)
)
model.add(
layers.GRU(32,
activation='relu',
input_shape=(look_back,numberOfCols),
return_sequences=True)
)
model.add(
layers.LSTM(80,
activation='relu',
input_shape=(look_back,numberOfCols))
)
model.add(Dense(numberOfCols))
model.compile(optimizer='adam', loss='mse')
num_epochs = 25
model.fit_generator(train_generator, epochs=num_epochs, verbose=1)
C:\Users\LOCAL_~1\Temp/ipykernel_10088/1287865284.py:31: UserWarning: `Model.fit_generator` is deprecated and will be removed in a future version. Please use `Model.fit`, which supports generators.
Epoch 1/25 734/734 [==============================] - 8s 5ms/step - loss: 0.0336 Epoch 2/25 734/734 [==============================] - 4s 6ms/step - loss: 0.0151 Epoch 3/25 734/734 [==============================] - 4s 5ms/step - loss: 0.0128 Epoch 4/25 734/734 [==============================] - 4s 5ms/step - loss: 0.0110 Epoch 5/25 734/734 [==============================] - 4s 5ms/step - loss: 0.0096 Epoch 6/25 734/734 [==============================] - 4s 5ms/step - loss: 0.0086 Epoch 7/25 734/734 [==============================] - 4s 5ms/step - loss: 0.0081 Epoch 8/25 734/734 [==============================] - 4s 5ms/step - loss: 0.0076 Epoch 9/25 734/734 [==============================] - 4s 5ms/step - loss: 0.0074 Epoch 10/25 734/734 [==============================] - 4s 5ms/step - loss: 0.0073 Epoch 11/25 734/734 [==============================] - 4s 5ms/step - loss: 0.0068 Epoch 12/25 734/734 [==============================] - 4s 5ms/step - loss: 0.0067 Epoch 13/25 734/734 [==============================] - 4s 6ms/step - loss: 0.0065 Epoch 14/25 734/734 [==============================] - 4s 6ms/step - loss: 0.0065 Epoch 15/25 734/734 [==============================] - 4s 5ms/step - loss: 0.0062 Epoch 16/25 734/734 [==============================] - 4s 5ms/step - loss: 0.0064 Epoch 17/25 734/734 [==============================] - 4s 6ms/step - loss: 0.0061 Epoch 18/25 734/734 [==============================] - 4s 5ms/step - loss: 0.0057 Epoch 19/25 734/734 [==============================] - 4s 5ms/step - loss: 0.0058 Epoch 20/25 734/734 [==============================] - 4s 5ms/step - loss: 0.0057 Epoch 21/25 734/734 [==============================] - 4s 5ms/step - loss: 0.0056 Epoch 22/25 734/734 [==============================] - 4s 5ms/step - loss: 0.0056 Epoch 23/25 734/734 [==============================] - 4s 5ms/step - loss: 0.0054 Epoch 24/25 734/734 [==============================] - 4s 5ms/step - loss: 0.0054 Epoch 25/25 734/734 [==============================] - 4s 6ms/step - loss: 0.0053
<keras.callbacks.History at 0x2165ec5f700>
nn_metal_train = scaler.inverse_transform(nn_metal_train)
nn_metal_test = scaler.inverse_transform(nn_metal_test)
prediction = model.predict(test_generator)
nn_metal_train = nn_metal_train.reshape((-1))
nn_metal_train = nn_metal_train[2::numberOfCols]
nn_metal_test = nn_metal_test.reshape((-1))
nn_metal_test = nn_metal_test[2::numberOfCols]
prediction = scaler.inverse_transform(prediction)
prediction = prediction.reshape((-1))
prediction = prediction[2::numberOfCols]
trace1 = go.Scatter(
x = date_train,
y = nn_metal_train,
mode = 'lines',
name = 'Data'
)
trace2 = go.Scatter(
x = date_test,
y = prediction,
mode = 'lines',
name = 'Predicted Value'
)
trace3 = go.Scatter(
x = date_test,
y = nn_metal_test,
mode='lines',
name = 'Actual values'
)
layout = go.Layout(
title = "Price Predictions",
xaxis = {'title' : "Date"},
yaxis = {'title' : "Value"}
)
fig = go.Figure(data=[trace1, trace2, trace3], layout=layout)
fig.show()
forecast_accuracy(prediction, nn_metal_test[:734])
{'me': -0.27585079067344564,
'mae': 0.6882383668052403,
'rmse': 0.9010776606976739,
'mse': 0.8119409506083924}
nn_metal_data = nn_metal_data.reshape((-1,numberOfCols))
def predict(num_prediction, model):
prediction_list = nn_metal_data[-look_back:]
for _ in range(num_prediction):
prediction_list = prediction_list.reshape((-1,numberOfCols))
#print(prediction_list)
#prediction_list = scaler.inverse_transform(prediction_list)
x = prediction_list[-look_back:]
x = x.reshape((1,look_back, numberOfCols))
out = model.predict(x)[0][0]
for i in range(0,numberOfCols):
prediction_list = np.append(prediction_list, model.predict(x)[0][i])
prediction_list = scaler.inverse_transform(prediction_list.reshape((-1,numberOfCols)))
prediction_list = prediction_list.reshape((-1))
#prediction_list = prediction_list[look_back-1:]
prediction_list = prediction_list[2::numberOfCols]
return prediction_list
def predict_dates(num_prediction):
last_date = df['Date'].values[-2]
prediction_dates = pd.date_range(last_date, periods=num_prediction+1, freq='D').tolist()
return prediction_dates
num_prediction = 10
forecast = predict(num_prediction, model)
forecast_dates = predict_dates(num_prediction)
forecast = forecast[1:]
nn_metal_data = scaler.inverse_transform(nn_metal_data)
nn_metal_data = nn_metal_data.reshape((-1))
nn_metal_data = nn_metal_data[2::numberOfCols]
forecast = forecast.reshape((-1))
#forecast_list1 = forecast[:5]
trace1 = go.Scatter(
x = df['Date'],
y = nn_metal_data,
mode = 'lines',
name = 'Data'
)
trace2 = go.Scatter(
x = forecast_dates,
y = forecast,
mode = 'lines',
name = 'forecast'
)
layout = go.Layout(
title = "Forecasting Price",
xaxis = {'title' : "Date"},
yaxis = {'title' : "Value"}
)
fig = go.Figure(data=[trace1, trace2], layout=layout)
fig.show()
test_size = int(0.3 * len(new_df_platinum))
train_size = len(new_df_platinum) - test_size
training_x = new_df_platinum[:train_size].drop('Platinum',axis=1)
training_y = new_df_platinum[:train_size]['Platinum']
testing_x = new_df_platinum[train_size:].drop(['Platinum'], axis=1)
testing_y = new_df_platinum[train_size:]['Platinum']
X2 = sm.add_constant(training_x)
est = sm.OLS(training_y, X2)
est2 = est.fit()
print(est2.summary())
OLS Regression Results
==============================================================================
Dep. Variable: Platinum R-squared: 0.957
Model: OLS Adj. R-squared: 0.955
Method: Least Squares F-statistic: 419.4
Date: Mon, 25 Jul 2022 Prob (F-statistic): 3.81e-315
Time: 21:18:46 Log-Likelihood: -2467.7
No. Observations: 516 AIC: 4989.
Df Residuals: 489 BIC: 5104.
Df Model: 26
Covariance Type: nonrobust
============================================================================================
coef std err t P>|t| [0.025 0.975]
--------------------------------------------------------------------------------------------
const 3930.7932 405.307 9.698 0.000 3134.435 4727.151
Gold_ETF -0.4268 0.072 -5.903 0.000 -0.569 -0.285
S&P_500 0.0744 0.019 4.001 0.000 0.038 0.111
Silver 32.6746 1.637 19.966 0.000 29.459 35.890
Palladium 0.1447 0.013 11.374 0.000 0.120 0.170
Crude_oil 0.4437 0.369 1.202 0.230 -0.282 1.169
Euro_USD_Exchange_Rate -563.7905 118.631 -4.752 0.000 -796.881 -330.700
EGO 3.9646 1.837 2.158 0.031 0.355 7.574
AU 11.3305 1.089 10.407 0.000 9.191 13.470
ABX -9.4887 1.418 -6.692 0.000 -12.275 -6.703
BVN -16.0734 1.569 -10.242 0.000 -19.157 -12.990
CPI -10.8402 1.590 -6.818 0.000 -13.964 -7.716
Covid_deaths 0.0232 0.003 8.253 0.000 0.018 0.029
Interest_rate_USA 99.3033 20.142 4.930 0.000 59.728 138.879
Interest_rate_UK 39.1819 17.193 2.279 0.023 5.401 72.963
Disease_volitaly_tracker -0.0269 0.223 -0.121 0.904 -0.465 0.411
Gold_price_trend -0.4207 0.198 -2.129 0.034 -0.809 -0.032
Covid_economic_impact -0.9563 0.201 -4.747 0.000 -1.352 -0.561
Stock_maket_news -0.3250 0.093 -3.478 0.001 -0.509 -0.141
Stock_market_crash 0.5200 0.227 2.286 0.023 0.073 0.967
silver_price_trend -0.6082 0.195 -3.120 0.002 -0.991 -0.225
Palladium_price_trend 0.1655 0.331 0.500 0.617 -0.485 0.816
Platinum_price_trend 2.1194 0.262 8.075 0.000 1.604 2.635
Gold_ETF_Return 321.6001 184.981 1.739 0.083 -41.856 685.056
Silver_Return -211.3560 95.496 -2.213 0.027 -398.989 -23.723
Platinum_Return 379.9889 85.352 4.452 0.000 212.287 547.691
Palladium_Return -184.0042 58.902 -3.124 0.002 -299.736 -68.273
==============================================================================
Omnibus: 0.938 Durbin-Watson: 0.416
Prob(Omnibus): 0.626 Jarque-Bera (JB): 0.996
Skew: 0.035 Prob(JB): 0.608
Kurtosis: 2.796 Cond. No. 1.41e+06
==============================================================================
Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The condition number is large, 1.41e+06. This might indicate that there are
strong multicollinearity or other numerical problems.
C:\ProgramData\Anaconda3\lib\site-packages\statsmodels\tsa\tsatools.py:142: FutureWarning: In a future version of pandas all arguments of concat except for the argument 'objs' will be keyword-only
# create regressor object
regressor = RandomForestRegressor(n_estimators = 500, random_state = 0)
# fit the regressor with x and y data
regressor.fit(training_x, training_y)
pred = regressor.predict(testing_x)
importances = regressor.feature_importances_
importances
#Create a DataFrame using a Dictionary
data={'feature_names':training_x.columns,'feature_importance':importances}
fi_df = pd.DataFrame(data)
fi_df
#Sort the DataFrame in order decreasing feature importance
fi_df.sort_values(by=['feature_importance'], ascending=False,inplace=True)
#Define size of bar plot
plt.figure(figsize=(10,8))
#Plot Searborn bar chart
sns.barplot(x=fi_df['feature_importance'], y=fi_df['feature_names'][:10])
#Add chart labels
plt.title( 'FEATURE IMPORTANCE')
plt.xlabel('FEATURE IMPORTANCE')
plt.ylabel('FEATURE NAMES')
Text(0, 0.5, 'FEATURE NAMES')
new_df.columns
Index(['Gold_ETF', 'S&P_500', 'Silver', 'Platinum', 'Palladium', 'Crude_oil',
'Euro_USD_Exchange_Rate', 'EGO', 'AU', 'ABX', 'BVN', 'CPI',
'Covid_deaths', 'Interest_rate_USA', 'Interest_rate_UK',
'Disease_volitaly_tracker', 'Gold_price_trend', 'Covid_economic_impact',
'Stock_maket_news', 'Stock_market_crash', 'silver_price_trend',
'Palladium_price_trend', 'Platinum_price_trend', 'Gold_ETF_Return',
'Silver_Return', 'Platinum_Return', 'Palladium_Return'],
dtype='object')
col_to_remove = ['Palladium_price_trend','Disease_volitaly_tracker']
new_df_platinum.drop(col_to_remove,axis=1,inplace=True)
scaler = MinMaxScaler()
feature_minmax_transform_data = scaler.fit_transform(new_df_platinum)
feature_minmax_transform = pd.DataFrame(columns=new_df_platinum.columns, data=feature_minmax_transform_data, index=new_df_platinum.index)
feature_minmax_transform.head()
transformed_df = feature_minmax_transform
test_size = int(0.3 * len(transformed_df))
train_size = len(transformed_df) - test_size
training_x = transformed_df[:train_size].drop('Platinum',axis=1)
training_y = transformed_df[:train_size]['Platinum']
testing_x = transformed_df[train_size:].drop(['Platinum'], axis=1)
testing_y = transformed_df[train_size:]['Platinum']
X2 = sm.add_constant(training_x)
est = sm.OLS(training_y, X2)
est2 = est.fit()
print(est2.summary())
OLS Regression Results
==============================================================================
Dep. Variable: Platinum R-squared: 0.957
Model: OLS Adj. R-squared: 0.955
Method: Least Squares F-statistic: 455.9
Date: Mon, 25 Jul 2022 Prob (F-statistic): 9.59e-318
Time: 21:18:50 Log-Likelihood: 908.62
No. Observations: 516 AIC: -1767.
Df Residuals: 491 BIC: -1661.
Df Model: 24
Covariance Type: nonrobust
==========================================================================================
coef std err t P>|t| [0.025 0.975]
------------------------------------------------------------------------------------------
const 0.0437 0.056 0.785 0.433 -0.066 0.153
Gold_ETF -0.4001 0.068 -5.918 0.000 -0.533 -0.267
S&P_500 0.2754 0.068 4.049 0.000 0.142 0.409
Silver 0.8290 0.041 20.029 0.000 0.748 0.910
Palladium 0.3390 0.023 14.477 0.000 0.293 0.385
Crude_oil 0.1121 0.083 1.343 0.180 -0.052 0.276
Euro_USD_Exchange_Rate -0.1901 0.038 -5.049 0.000 -0.264 -0.116
EGO 0.0509 0.024 2.114 0.035 0.004 0.098
AU 0.3765 0.036 10.442 0.000 0.306 0.447
ABX -0.2580 0.038 -6.720 0.000 -0.333 -0.183
BVN -0.2676 0.026 -10.469 0.000 -0.318 -0.217
CPI -0.6378 0.086 -7.375 0.000 -0.808 -0.468
Covid_deaths 0.1564 0.019 8.443 0.000 0.120 0.193
Interest_rate_USA 0.2633 0.052 5.024 0.000 0.160 0.366
Interest_rate_UK 0.1599 0.068 2.359 0.019 0.027 0.293
Gold_price_trend -0.0441 0.021 -2.078 0.038 -0.086 -0.002
Covid_economic_impact -0.1392 0.029 -4.877 0.000 -0.195 -0.083
Stock_maket_news -0.0468 0.013 -3.489 0.001 -0.073 -0.020
Stock_market_crash 0.0763 0.028 2.693 0.007 0.021 0.132
silver_price_trend -0.0791 0.026 -3.082 0.002 -0.130 -0.029
Platinum_price_trend 0.1887 0.022 8.476 0.000 0.145 0.232
Gold_ETF_Return 0.0516 0.029 1.782 0.075 -0.005 0.109
Silver_Return -0.0847 0.038 -2.253 0.025 -0.159 -0.011
Platinum_Return 0.1278 0.029 4.450 0.000 0.071 0.184
Palladium_Return -0.1241 0.039 -3.204 0.001 -0.200 -0.048
==============================================================================
Omnibus: 0.988 Durbin-Watson: 0.417
Prob(Omnibus): 0.610 Jarque-Bera (JB): 1.030
Skew: 0.027 Prob(JB): 0.598
Kurtosis: 2.788 Cond. No. 124.
==============================================================================
Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
C:\ProgramData\Anaconda3\lib\site-packages\statsmodels\tsa\tsatools.py:142: FutureWarning: In a future version of pandas all arguments of concat except for the argument 'objs' will be keyword-only
date_df = pd.DataFrame(transformed_df["Platinum"])
date_df['Date'] = date_df.index
date_df.drop(columns="Platinum",inplace=True)
date_df= date_df.squeeze()
float_data = np.array(transformed_df.values.tolist())
# Train-Test Split
nn_metal_data = float_data
split_percent = 0.70
split = int(split_percent*len(nn_metal_data))
nn_metal_train = nn_metal_data
nn_metal_test = nn_metal_data
date_train = date_df
date_test = date_df
print(len(nn_metal_train))
print(len(nn_metal_test))
737 737
look_back =3 #lags
numberOfCols = len(transformed_df.columns)
#train and test generator
train_generator = TimeseriesGenerator(nn_metal_train, nn_metal_train, length=look_back, batch_size=1)
test_generator = TimeseriesGenerator(nn_metal_test, nn_metal_test, length=look_back, batch_size=1)
model = Sequential()
model.add(
LSTM(32,
activation='relu',
input_shape=(look_back,numberOfCols),
return_sequences=True)
)
model.add(
layers.GRU(64,
activation='relu',
activity_regularizer=regularizers.l2(0.01),
input_shape=(look_back,numberOfCols),
return_sequences=True)
)
model.add(
layers.GRU(32,
activation='relu',
input_shape=(look_back,numberOfCols),
return_sequences=True)
)
model.add(
layers.LSTM(80,
activation='relu',
input_shape=(look_back,numberOfCols))
)
model.add(Dense(numberOfCols))
model.compile(optimizer='adam', loss='mse')
num_epochs = 25
model.fit_generator(train_generator, epochs=num_epochs, verbose=1)
C:\Users\LOCAL_~1\Temp/ipykernel_10088/1287865284.py:31: UserWarning: `Model.fit_generator` is deprecated and will be removed in a future version. Please use `Model.fit`, which supports generators.
Epoch 1/25 734/734 [==============================] - 8s 5ms/step - loss: 0.0340 Epoch 2/25 734/734 [==============================] - 4s 5ms/step - loss: 0.0136 Epoch 3/25 734/734 [==============================] - 4s 5ms/step - loss: 0.0104 Epoch 4/25 734/734 [==============================] - 4s 5ms/step - loss: 0.0095 Epoch 5/25 734/734 [==============================] - 4s 5ms/step - loss: 0.0085 Epoch 6/25 734/734 [==============================] - 4s 5ms/step - loss: 0.0080 Epoch 7/25 734/734 [==============================] - 4s 5ms/step - loss: 0.0074 Epoch 8/25 734/734 [==============================] - 4s 5ms/step - loss: 0.0072 Epoch 9/25 734/734 [==============================] - 4s 5ms/step - loss: 0.0069 Epoch 10/25 734/734 [==============================] - 4s 5ms/step - loss: 0.0065 Epoch 11/25 734/734 [==============================] - 4s 5ms/step - loss: 0.0064 Epoch 12/25 734/734 [==============================] - 4s 6ms/step - loss: 0.0064 Epoch 13/25 734/734 [==============================] - 4s 5ms/step - loss: 0.0060 Epoch 14/25 734/734 [==============================] - 4s 5ms/step - loss: 0.0059 Epoch 15/25 734/734 [==============================] - 4s 5ms/step - loss: 0.0060 Epoch 16/25 734/734 [==============================] - 4s 6ms/step - loss: 0.0056 Epoch 17/25 734/734 [==============================] - 4s 5ms/step - loss: 0.0055 Epoch 18/25 734/734 [==============================] - 4s 5ms/step - loss: 0.0056 Epoch 19/25 734/734 [==============================] - 4s 5ms/step - loss: 0.0054 Epoch 20/25 734/734 [==============================] - 5s 7ms/step - loss: 0.0053A: 0s Epoch 21/25 734/734 [==============================] - 5s 7ms/step - loss: 0.0054 Epoch 22/25 734/734 [==============================] - 4s 6ms/step - loss: 0.0051 Epoch 23/25 734/734 [==============================] - 4s 6ms/step - loss: 0.0050 Epoch 24/25 734/734 [==============================] - 4s 5ms/step - loss: 0.0049 Epoch 25/25 734/734 [==============================] - 4s 5ms/step - loss: 0.0048
<keras.callbacks.History at 0x21666aeda60>
nn_metal_train = scaler.inverse_transform(nn_metal_train)
nn_metal_test = scaler.inverse_transform(nn_metal_test)
prediction = model.predict(test_generator)
nn_metal_train = nn_metal_train.reshape((-1))
nn_metal_train = nn_metal_train[3::numberOfCols]
nn_metal_test = nn_metal_test.reshape((-1))
nn_metal_test = nn_metal_test[3::numberOfCols]
prediction = scaler.inverse_transform(prediction)
prediction = prediction.reshape((-1))
prediction = prediction[3::numberOfCols]
trace1 = go.Scatter(
x = date_train,
y = nn_metal_train,
mode = 'lines',
name = 'Data'
)
trace2 = go.Scatter(
x = date_test,
y = prediction,
mode = 'lines',
name = 'Predicted Value'
)
trace3 = go.Scatter(
x = date_test,
y = nn_metal_test,
mode='lines',
name = 'Actual values'
)
layout = go.Layout(
title = "Price Predictions",
xaxis = {'title' : "Date"},
yaxis = {'title' : "Value"}
)
fig = go.Figure(data=[trace1, trace2, trace3], layout=layout)
fig.show()
forecast_accuracy(prediction, nn_metal_test[:734])
{'me': 3.995765873132025,
'mae': 29.144590073255497,
'rmse': 37.77006171478542,
'mse': 1426.5775619386995}
nn_metal_data = nn_metal_data.reshape((-1,numberOfCols))
def predict(num_prediction, model):
prediction_list = nn_metal_data[-look_back:]
for _ in range(num_prediction):
prediction_list = prediction_list.reshape((-1,numberOfCols))
#print(prediction_list)
#prediction_list = scaler.inverse_transform(prediction_list)
x = prediction_list[-look_back:]
x = x.reshape((1,look_back, numberOfCols))
out = model.predict(x)[0][0]
for i in range(0,numberOfCols):
prediction_list = np.append(prediction_list, model.predict(x)[0][i])
prediction_list = scaler.inverse_transform(prediction_list.reshape((-1,numberOfCols)))
prediction_list = prediction_list.reshape((-1))
#prediction_list = prediction_list[look_back-1:]
prediction_list = prediction_list[3::numberOfCols]
return prediction_list
def predict_dates(num_prediction):
last_date = df['Date'].values[-2]
prediction_dates = pd.date_range(last_date, periods=num_prediction+1, freq='D').tolist()
return prediction_dates
num_prediction = 10
forecast = predict(num_prediction, model)
forecast_dates = predict_dates(num_prediction)
forecast = forecast[1:]
nn_metal_data = scaler.inverse_transform(nn_metal_data)
nn_metal_data = nn_metal_data.reshape((-1))
nn_metal_data = nn_metal_data[3::numberOfCols]
forecast = forecast.reshape((-1))
#forecast_list1 = forecast[:5]
trace1 = go.Scatter(
x = df['Date'],
y = nn_metal_data,
mode = 'lines',
name = 'Data'
)
trace2 = go.Scatter(
x = forecast_dates,
y = forecast,
mode = 'lines',
name = 'forecast'
)
layout = go.Layout(
title = "Forecasting Price",
xaxis = {'title' : "Date"},
yaxis = {'title' : "Value"}
)
fig = go.Figure(data=[trace1, trace2], layout=layout)
fig.show()
test_size = int(0.3 * len(new_df_palladium))
train_size = len(new_df_palladium) - test_size
training_x = new_df_palladium[:train_size].drop('Palladium',axis=1)
training_y = new_df_palladium[:train_size]['Palladium']
testing_x = new_df_palladium[train_size:].drop(['Palladium'], axis=1)
testing_y = new_df_palladium[train_size:]['Palladium']
X2 = sm.add_constant(training_x)
est = sm.OLS(training_y, X2)
est2 = est.fit()
print(est2.summary())
OLS Regression Results
==============================================================================
Dep. Variable: Palladium R-squared: 0.949
Model: OLS Adj. R-squared: 0.947
Method: Least Squares F-statistic: 351.9
Date: Mon, 25 Jul 2022 Prob (F-statistic): 1.91e-297
Time: 21:20:46 Log-Likelihood: -3061.4
No. Observations: 516 AIC: 6177.
Df Residuals: 489 BIC: 6291.
Df Model: 26
Covariance Type: nonrobust
============================================================================================
coef std err t P>|t| [0.025 0.975]
--------------------------------------------------------------------------------------------
const -8633.3808 1342.971 -6.429 0.000 -1.13e+04 -5994.675
Gold_ETF 1.7358 0.223 7.781 0.000 1.298 2.174
S&P_500 0.3205 0.058 5.533 0.000 0.207 0.434
Silver -44.9547 6.664 -6.745 0.000 -58.049 -31.860
Platinum 1.4454 0.127 11.374 0.000 1.196 1.695
Crude_oil -5.9490 1.137 -5.231 0.000 -8.183 -3.715
Euro_USD_Exchange_Rate -545.0489 382.652 -1.424 0.155 -1296.895 206.797
EGO 10.2883 5.814 1.770 0.077 -1.135 21.712
AU -15.7094 3.735 -4.206 0.000 -23.049 -8.370
ABX -4.8992 4.676 -1.048 0.295 -14.087 4.288
BVN 28.4972 5.311 5.365 0.000 18.062 38.933
CPI 28.6183 5.096 5.616 0.000 18.606 38.631
Covid_deaths -0.0403 0.009 -4.322 0.000 -0.059 -0.022
Interest_rate_USA -156.5867 64.828 -2.415 0.016 -283.962 -29.212
Interest_rate_UK 49.3423 54.574 0.904 0.366 -57.886 156.570
Disease_volitaly_tracker 0.7730 0.703 1.099 0.272 -0.609 2.155
Gold_price_trend -1.1202 0.625 -1.791 0.074 -2.349 0.109
Covid_economic_impact 2.2294 0.643 3.466 0.001 0.966 3.493
Stock_maket_news 2.1449 0.283 7.586 0.000 1.589 2.700
Stock_market_crash -1.5259 0.719 -2.121 0.034 -2.939 -0.112
silver_price_trend -1.9556 0.616 -3.176 0.002 -3.166 -0.746
Palladium_price_trend 11.8981 0.898 13.255 0.000 10.134 13.662
Platinum_price_trend -6.6230 0.831 -7.973 0.000 -8.255 -4.991
Gold_ETF_Return -1255.5072 583.604 -2.151 0.032 -2402.189 -108.826
Silver_Return 378.9152 302.798 1.251 0.211 -216.030 973.861
Platinum_Return -611.9661 273.736 -2.236 0.026 -1149.811 -74.121
Palladium_Return 1015.8034 182.284 5.573 0.000 657.648 1373.959
==============================================================================
Omnibus: 9.482 Durbin-Watson: 0.452
Prob(Omnibus): 0.009 Jarque-Bera (JB): 9.479
Skew: 0.307 Prob(JB): 0.00874
Kurtosis: 3.253 Cond. No. 1.34e+06
==============================================================================
Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The condition number is large, 1.34e+06. This might indicate that there are
strong multicollinearity or other numerical problems.
C:\ProgramData\Anaconda3\lib\site-packages\statsmodels\tsa\tsatools.py:142: FutureWarning: In a future version of pandas all arguments of concat except for the argument 'objs' will be keyword-only
# create regressor object
regressor = RandomForestRegressor(n_estimators = 500, random_state = 0)
# fit the regressor with x and y data
regressor.fit(training_x, training_y)
pred = regressor.predict(testing_x)
importances = regressor.feature_importances_
importances
#Create a DataFrame using a Dictionary
data={'feature_names':training_x.columns,'feature_importance':importances}
fi_df = pd.DataFrame(data)
fi_df
#Sort the DataFrame in order decreasing feature importance
fi_df.sort_values(by=['feature_importance'], ascending=False,inplace=True)
#Define size of bar plot
plt.figure(figsize=(10,8))
#Plot Searborn bar chart
sns.barplot(x=fi_df['feature_importance'], y=fi_df['feature_names'][:10])
#Add chart labels
plt.title( 'FEATURE IMPORTANCE')
plt.xlabel('FEATURE IMPORTANCE')
plt.ylabel('FEATURE NAMES')
Text(0, 0.5, 'FEATURE NAMES')
new_df.columns
Index(['Gold_ETF', 'S&P_500', 'Silver', 'Platinum', 'Palladium', 'Crude_oil',
'Euro_USD_Exchange_Rate', 'EGO', 'AU', 'ABX', 'BVN', 'CPI',
'Covid_deaths', 'Interest_rate_USA', 'Interest_rate_UK',
'Disease_volitaly_tracker', 'Gold_price_trend', 'Covid_economic_impact',
'Stock_maket_news', 'Stock_market_crash', 'silver_price_trend',
'Palladium_price_trend', 'Platinum_price_trend', 'Gold_ETF_Return',
'Silver_Return', 'Platinum_Return', 'Palladium_Return'],
dtype='object')
col_to_remove = ['Disease_volitaly_tracker']
new_df_palladium.drop(col_to_remove,axis=1,inplace=True)
scaler = MinMaxScaler()
feature_minmax_transform_data = scaler.fit_transform(new_df_palladium)
feature_minmax_transform = pd.DataFrame(columns=new_df_palladium.columns, data=feature_minmax_transform_data, index=new_df_palladium.index)
feature_minmax_transform.head()
transformed_df = feature_minmax_transform
test_size = int(0.3 * len(transformed_df))
train_size = len(transformed_df) - test_size
training_x = transformed_df[:train_size].drop('Palladium',axis=1)
training_y = transformed_df[:train_size]['Palladium']
testing_x = transformed_df[train_size:].drop(['Palladium'], axis=1)
testing_y = transformed_df[train_size:]['Palladium']
X2 = sm.add_constant(training_x)
est = sm.OLS(training_y, X2)
est2 = est.fit()
print(est2.summary())
OLS Regression Results
==============================================================================
Dep. Variable: Palladium R-squared: 0.949
Model: OLS Adj. R-squared: 0.947
Method: Least Squares F-statistic: 365.8
Date: Mon, 25 Jul 2022 Prob (F-statistic): 1.80e-298
Time: 21:20:49 Log-Likelihood: 740.50
No. Observations: 516 AIC: -1429.
Df Residuals: 490 BIC: -1319.
Df Model: 25
Covariance Type: nonrobust
==========================================================================================
coef std err t P>|t| [0.025 0.975]
------------------------------------------------------------------------------------------
const 0.0944 0.077 1.220 0.223 -0.058 0.246
Gold_ETF 0.7072 0.092 7.711 0.000 0.527 0.887
S&P_500 0.5087 0.093 5.464 0.000 0.326 0.692
Silver -0.4988 0.074 -6.722 0.000 -0.645 -0.353
Platinum 0.6341 0.056 11.395 0.000 0.525 0.743
Crude_oil -0.6249 0.114 -5.471 0.000 -0.849 -0.400
Euro_USD_Exchange_Rate -0.0656 0.054 -1.215 0.225 -0.172 0.040
EGO 0.0625 0.034 1.859 0.064 -0.004 0.129
AU -0.2330 0.054 -4.286 0.000 -0.340 -0.126
ABX -0.0596 0.056 -1.072 0.284 -0.169 0.050
BVN 0.2007 0.038 5.251 0.000 0.126 0.276
CPI 0.7155 0.128 5.577 0.000 0.463 0.968
Covid_deaths -0.1249 0.027 -4.624 0.000 -0.178 -0.072
Interest_rate_USA -0.1961 0.074 -2.649 0.008 -0.342 -0.051
Interest_rate_UK 0.0982 0.095 1.033 0.302 -0.089 0.285
Gold_price_trend -0.0531 0.030 -1.752 0.080 -0.113 0.006
Covid_economic_impact 0.1474 0.040 3.678 0.000 0.069 0.226
Stock_maket_news 0.1363 0.018 7.655 0.000 0.101 0.171
Stock_market_crash -0.0800 0.042 -1.901 0.058 -0.163 0.003
silver_price_trend -0.1155 0.036 -3.200 0.001 -0.186 -0.045
Palladium_price_trend 0.6802 0.051 13.219 0.000 0.579 0.781
Platinum_price_trend -0.2523 0.032 -7.916 0.000 -0.315 -0.190
Gold_ETF_Return -0.0867 0.040 -2.157 0.031 -0.166 -0.008
Silver_Return 0.0704 0.052 1.345 0.179 -0.032 0.173
Platinum_Return -0.0915 0.040 -2.262 0.024 -0.171 -0.012
Palladium_Return 0.3046 0.053 5.797 0.000 0.201 0.408
==============================================================================
Omnibus: 10.352 Durbin-Watson: 0.445
Prob(Omnibus): 0.006 Jarque-Bera (JB): 10.443
Skew: 0.319 Prob(JB): 0.00540
Kurtosis: 3.283 Cond. No. 133.
==============================================================================
Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
C:\ProgramData\Anaconda3\lib\site-packages\statsmodels\tsa\tsatools.py:142: FutureWarning: In a future version of pandas all arguments of concat except for the argument 'objs' will be keyword-only
date_df = pd.DataFrame(transformed_df["Palladium"])
date_df['Date'] = date_df.index
date_df.drop(columns="Palladium",inplace=True)
date_df= date_df.squeeze()
float_data = np.array(transformed_df.values.tolist())
# Train-Test Split
nn_metal_data = float_data
split_percent = 0.70
split = int(split_percent*len(nn_metal_data))
nn_metal_train = nn_metal_data
nn_metal_test = nn_metal_data
date_train = date_df
date_test = date_df
print(len(nn_metal_train))
print(len(nn_metal_test))
737 737
look_back =3 #lags
numberOfCols = len(transformed_df.columns)
#train and test generator
train_generator = TimeseriesGenerator(nn_metal_train, nn_metal_train, length=look_back, batch_size=1)
test_generator = TimeseriesGenerator(nn_metal_test, nn_metal_test, length=look_back, batch_size=1)
model = Sequential()
model.add(
LSTM(32,
activation='relu',
input_shape=(look_back,numberOfCols),
return_sequences=True)
)
model.add(
layers.GRU(64,
activation='relu',
activity_regularizer=regularizers.l2(0.01),
input_shape=(look_back,numberOfCols),
return_sequences=True)
)
model.add(
layers.GRU(32,
activation='relu',
input_shape=(look_back,numberOfCols),
return_sequences=True)
)
model.add(
layers.LSTM(80,
activation='relu',
input_shape=(look_back,numberOfCols))
)
model.add(Dense(numberOfCols))
model.compile(optimizer='adam', loss='mse')
num_epochs = 25
model.fit_generator(train_generator, epochs=num_epochs, verbose=1)
C:\Users\LOCAL_~1\Temp/ipykernel_10088/1287865284.py:31: UserWarning: `Model.fit_generator` is deprecated and will be removed in a future version. Please use `Model.fit`, which supports generators.
Epoch 1/25 734/734 [==============================] - 8s 5ms/step - loss: 0.0366 Epoch 2/25 734/734 [==============================] - 4s 5ms/step - loss: 0.0175 Epoch 3/25 734/734 [==============================] - 4s 5ms/step - loss: 0.0113 Epoch 4/25 734/734 [==============================] - 4s 5ms/step - loss: 0.0096 Epoch 5/25 734/734 [==============================] - 4s 5ms/step - loss: 0.0089 Epoch 6/25 734/734 [==============================] - 4s 5ms/step - loss: 0.0082 Epoch 7/25 734/734 [==============================] - 4s 5ms/step - loss: 0.0079 Epoch 8/25 734/734 [==============================] - 4s 5ms/step - loss: 0.0076 Epoch 9/25 734/734 [==============================] - 4s 5ms/step - loss: 0.0072 Epoch 10/25 734/734 [==============================] - 4s 5ms/step - loss: 0.0069 Epoch 11/25 734/734 [==============================] - 4s 5ms/step - loss: 0.0066 Epoch 12/25 734/734 [==============================] - 4s 5ms/step - loss: 0.0065 Epoch 13/25 734/734 [==============================] - 4s 5ms/step - loss: 0.0064 Epoch 14/25 734/734 [==============================] - 4s 5ms/step - loss: 0.0059 Epoch 15/25 734/734 [==============================] - 4s 5ms/step - loss: 0.0059 Epoch 16/25 734/734 [==============================] - 4s 5ms/step - loss: 0.0060 Epoch 17/25 734/734 [==============================] - 4s 5ms/step - loss: 0.0058 Epoch 18/25 734/734 [==============================] - 4s 6ms/step - loss: 0.0055 Epoch 19/25 734/734 [==============================] - 4s 6ms/step - loss: 0.0055 Epoch 20/25 734/734 [==============================] - 4s 5ms/step - loss: 0.0054 Epoch 21/25 734/734 [==============================] - 4s 5ms/step - loss: 0.0054 Epoch 22/25 734/734 [==============================] - 4s 5ms/step - loss: 0.0053 Epoch 23/25 734/734 [==============================] - 4s 5ms/step - loss: 0.0051 Epoch 24/25 734/734 [==============================] - 4s 5ms/step - loss: 0.0050 Epoch 25/25 734/734 [==============================] - 4s 5ms/step - loss: 0.0050
<keras.callbacks.History at 0x2165dbaf3a0>
nn_metal_train = scaler.inverse_transform(nn_metal_train)
nn_metal_test = scaler.inverse_transform(nn_metal_test)
prediction = model.predict(test_generator)
nn_metal_train = nn_metal_train.reshape((-1))
nn_metal_train = nn_metal_train[4::numberOfCols]
nn_metal_test = nn_metal_test.reshape((-1))
nn_metal_test = nn_metal_test[4::numberOfCols]
prediction = scaler.inverse_transform(prediction)
prediction = prediction.reshape((-1))
prediction = prediction[4::numberOfCols]
trace1 = go.Scatter(
x = date_train,
y = nn_metal_train,
mode = 'lines',
name = 'Data'
)
trace2 = go.Scatter(
x = date_test,
y = prediction,
mode = 'lines',
name = 'Predicted Value'
)
trace3 = go.Scatter(
x = date_test,
y = nn_metal_test,
mode='lines',
name = 'Actual values'
)
layout = go.Layout(
title = "Price Predictions",
xaxis = {'title' : "Date"},
yaxis = {'title' : "Value"}
)
fig = go.Figure(data=[trace1, trace2, trace3], layout=layout)
fig.show()
forecast_accuracy(prediction, nn_metal_test[:734])
{'me': 3.013918543511576,
'mae': 80.29563646862229,
'rmse': 113.29686952957792,
'mse': 12836.180645202203}
nn_metal_data = nn_metal_data.reshape((-1,numberOfCols))
def predict(num_prediction, model):
prediction_list = nn_metal_data[-look_back:]
for _ in range(num_prediction):
prediction_list = prediction_list.reshape((-1,numberOfCols))
#print(prediction_list)
#prediction_list = scaler.inverse_transform(prediction_list)
x = prediction_list[-look_back:]
x = x.reshape((1,look_back, numberOfCols))
out = model.predict(x)[0][0]
for i in range(0,numberOfCols):
prediction_list = np.append(prediction_list, model.predict(x)[0][i])
prediction_list = scaler.inverse_transform(prediction_list.reshape((-1,numberOfCols)))
prediction_list = prediction_list.reshape((-1))
#prediction_list = prediction_list[look_back-1:]
prediction_list = prediction_list[4::numberOfCols]
return prediction_list
def predict_dates(num_prediction):
last_date = df['Date'].values[-2]
prediction_dates = pd.date_range(last_date, periods=num_prediction+1, freq='D').tolist()
return prediction_dates
num_prediction = 10
forecast = predict(num_prediction, model)
forecast_dates = predict_dates(num_prediction)
forecast = forecast[1:]
nn_metal_data = scaler.inverse_transform(nn_metal_data)
nn_metal_data = nn_metal_data.reshape((-1))
nn_metal_data = nn_metal_data[4::numberOfCols]
forecast = forecast.reshape((-1))
#forecast_list1 = forecast[:5]
trace1 = go.Scatter(
x = df['Date'],
y = nn_metal_data,
mode = 'lines',
name = 'Data'
)
trace2 = go.Scatter(
x = forecast_dates,
y = forecast,
mode = 'lines',
name = 'forecast'
)
layout = go.Layout(
title = "Forecasting Price",
xaxis = {'title' : "Date"},
yaxis = {'title' : "Value"}
)
fig = go.Figure(data=[trace1, trace2], layout=layout)
fig.show()